Grafici andamento Covid-19

Data e Ora ultimo aggiornamento

In [1]:
import datetime

print(datetime.datetime.today())
2020-11-17 08:39:47.867575
In [2]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Clicca qui per nascondere / mostrare il codice"></form>''')
Out[2]:
In [3]:
import pandas as pd
import numpy as np
from datetime import datetime
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px


import warnings
warnings.filterwarnings('ignore')
In [4]:
url_r = "https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-regioni/dpc-covid19-ita-regioni.csv"
data_region = pd.read_csv(url_r)
#print(data_region.dtypes)
#print(data_region.isnull().sum())
#print(data_region.shape)
#print(data_region.head())

url_p = "https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-province/dpc-covid19-ita-province.csv"
data_province = pd.read_csv(url_p)
#print(data_province.dtypes)
#print(data_province.isnull().sum())
#print(data_province.shape)
#print(data_province.head())

url_n = "https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-andamento-nazionale/dpc-covid19-ita-andamento-nazionale.csv"
data_national = pd.read_csv(url_n)
#print(data_national.dtypes)
#print(data_national.isnull().sum())
#print(data_national.shape)
#print(data_national.head())

Tabella dei dati degli ultimi giorni

In [5]:
#add daily cases/deaths/recovered columns
data_national['new_cases'] = data_national['totale_casi'].diff()
data_national['new_deaths'] = data_national['deceduti'].diff()
data_national['new_recovered'] = data_national['dimessi_guariti'].diff()
data_national['new_swabs'] = data_national['tamponi'].diff()
data_national['new_unique_tested'] = data_national['casi_testati'].diff()
#add a day/day-1 percentage change for new_cases
data_national['daily_cases_perc_change'] = round((data_national['new_cases'].pct_change(1))*100,2)
data_national['daily_swab_perc_change'] = round((data_national['new_swabs'].pct_change(1))*100,2)
data_national['daily_unique_tested_perc_change'] = round((data_national['new_unique_tested'].pct_change(1))*100,2)
#detect ratio
data_national['detect_ratio_swabs'] = round((data_national['new_cases'] / data_national['new_swabs'])*100,2)
data_national['detect_ratio_cases'] = round((data_national['new_cases'] / data_national['new_unique_tested'])*100,2)
data_national.tail(10)
Out[5]:
data stato ricoverati_con_sintomi terapia_intensiva totale_ospedalizzati isolamento_domiciliare totale_positivi variazione_totale_positivi nuovi_positivi dimessi_guariti ... new_cases new_deaths new_recovered new_swabs new_unique_tested daily_cases_perc_change daily_swab_perc_change daily_unique_tested_perc_change detect_ratio_swabs detect_ratio_cases
257 2020-11-07T17:00:00 ITA 25109 2634 27743 504793 532536 33418 39811 328891 ... 39809.0 425.0 5966.0 231673.0 137646.0 5.31 -1.10 2.29 17.18 28.92
258 2020-11-08T17:00:00 ITA 26440 2749 29189 529447 558636 26100 32616 335074 ... 32614.0 331.0 6183.0 191144.0 119249.0 -18.07 -17.49 -13.37 17.06 27.35
259 2020-11-09T17:00:00 ITA 27636 2849 30485 542849 573334 14698 25271 345289 ... 25269.0 356.0 10215.0 147725.0 88701.0 -22.52 -22.72 -25.62 17.11 28.49
260 2020-11-10T17:00:00 ITA 28633 2971 31604 558506 590110 16776 35098 363023 ... 35090.0 580.0 17734.0 217758.0 129814.0 38.87 47.41 46.35 16.11 27.03
261 2020-11-11T17:00:00 ITA 29444 3081 32525 580833 613358 23248 32961 372113 ... 32961.0 623.0 9090.0 225640.0 126410.0 -6.07 3.62 -2.62 14.61 26.07
262 2020-11-12T17:00:00 ITA 29873 3170 33043 602011 635054 21696 37978 387758 ... 37977.0 636.0 15645.0 234672.0 133478.0 15.22 4.00 5.59 16.18 28.45
263 2020-11-13T17:00:00 ITA 30914 3230 34144 629782 663926 28872 40902 399238 ... 40902.0 550.0 11480.0 254908.0 144875.0 7.70 8.62 8.54 16.05 28.23
264 2020-11-14T17:00:00 ITA 31398 3306 34704 653731 688435 24509 37255 411434 ... 37249.0 544.0 12196.0 227695.0 135117.0 -8.93 -10.68 -6.74 16.36 27.57
265 2020-11-15T17:00:00 ITA 32047 3422 35469 677021 712490 24055 33979 420810 ... 33977.0 546.0 9376.0 195275.0 110731.0 -8.78 -14.24 -18.05 17.40 30.68
266 2020-11-16T17:00:00 ITA 32536 3492 36028 681756 717784 5294 27354 442364 ... 27352.0 504.0 21554.0 152663.0 89082.0 -19.50 -21.82 -19.55 17.92 30.70

10 rows × 27 columns

In [6]:
#regional data preparation

data_region_Abruzzo = data_region[(data_region['denominazione_regione'] == 'Abruzzo')]
data_region_Basilicata = data_region[(data_region['denominazione_regione'] == 'Basilicata')]
data_region_Bolzano = data_region[(data_region['denominazione_regione'] == 'P.A. Bolzano')]
data_region_Calabria = data_region[(data_region['denominazione_regione'] == 'Calabria')]
data_region_Campania = data_region[(data_region['denominazione_regione'] == 'Campania')]
data_region_EmiliaR = data_region[(data_region['denominazione_regione'] == 'Emilia-Romagna')]
data_region_Friuli = data_region[(data_region['denominazione_regione'] == 'Friuli Venezia Giulia')]
data_region_Lazio = data_region[(data_region['denominazione_regione'] == 'Lazio')]
data_region_Liguria = data_region[(data_region['denominazione_regione'] == 'Liguria')]
data_region_Lombardia = data_region[(data_region['denominazione_regione'] == 'Lombardia')]
data_region_Marche = data_region[(data_region['denominazione_regione'] == 'Marche')]
data_region_Molise = data_region[(data_region['denominazione_regione'] == 'Molise')]
data_region_Piemonte = data_region[(data_region['denominazione_regione'] == 'Piemonte')]
data_region_Puglia = data_region[(data_region['denominazione_regione'] == 'Puglia')]
data_region_Sardegna = data_region[(data_region['denominazione_regione'] == 'Sardegna')]
data_region_Sicilia = data_region[(data_region['denominazione_regione'] == 'Sicilia')]
data_region_Toscana = data_region[(data_region['denominazione_regione'] == 'Toscana')]
data_region_Trento = data_region[(data_region['denominazione_regione'] == 'P.A. Trento')]
data_region_Umbria = data_region[(data_region['denominazione_regione'] == 'Umbria')]
data_region_VAosta = data_region[(data_region['denominazione_regione'] == "Valle d'Aosta")]
data_region_Veneto = data_region[(data_region['denominazione_regione'] == 'Veneto')]

def region_apply(region):
    for x in region:
        x['new_cases'] =  x['totale_casi'].diff()
        x['new_deaths'] = x['deceduti'].diff()
        x['new_recovered'] = x['dimessi_guariti'].diff()
        x['new_swabs'] = x['tamponi'].diff()
        #add a day/day-1 percentage change for new_cases
        x['daily_cases_perc_change'] = round((x['new_cases'].pct_change(1))*100,2)
        x['daily_swab_perc_change'] = round((x['new_swabs'].pct_change(1))*100,2)
        #detect ratio
        x['detect_ratio'] = round((x['new_cases'] / x['new_swabs'])*100,2)
        return; 

region_apply([data_region_Abruzzo])  
region_apply([data_region_Basilicata]) 
region_apply([data_region_Bolzano])
region_apply([data_region_Calabria])
region_apply([data_region_Campania])
region_apply([data_region_EmiliaR])
region_apply([data_region_Friuli])
region_apply([data_region_Lazio])
region_apply([data_region_Liguria])
region_apply([data_region_Lombardia])
region_apply([data_region_Marche])
region_apply([data_region_Molise])
region_apply([data_region_Piemonte])
region_apply([data_region_Puglia])
region_apply([data_region_Sardegna])
region_apply([data_region_Sicilia])
region_apply([data_region_Toscana])
region_apply([data_region_Trento])
region_apply([data_region_VAosta])
region_apply([data_region_Veneto])
In [7]:
data_region_Nordovest = data_region[(data_region.denominazione_regione.isin(['Piemonte', 'Lombardia', 'Liguria',"Valle d'Aosta"]))]
data_region_Nordest = data_region[(data_region.denominazione_regione.isin(['Emilia-Romagna', 'P.A. Bolzano', 'P.A. Trento', 'Veneto', 'Friuli Venezia Giulia']))]
data_region_Centro = data_region[(data_region.denominazione_regione.isin(['Toscana', 'Umbria', 'Marche', 'Lazio']))]
data_region_Sudisole = data_region[(data_region.denominazione_regione.isin(['Abruzzo', 'Molise', 'Campania', 'Puglia', 'Basilicata', 'Calabria', 'Sicilia', 'Sardegna']))]

cases_Nordovest = data_region_Nordovest.groupby('data').sum()
region_apply([cases_Nordovest])  
cases_Nordovest['data'] = cases_Nordovest.index

cases_Nordest = data_region_Nordest.groupby('data').sum()
region_apply([cases_Nordest])  
cases_Nordest['data'] = cases_Nordest.index

cases_Centro = data_region_Centro.groupby('data').sum()
region_apply([cases_Centro])  
cases_Centro['data'] = cases_Centro.index

cases_Sudisole = data_region_Sudisole.groupby('data').sum()
region_apply([cases_Sudisole])  
cases_Sudisole['data'] = cases_Sudisole.index

cases_Nordovest.tail(5)
Out[7]:
codice_regione lat long ricoverati_con_sintomi terapia_intensiva totale_ospedalizzati isolamento_domiciliare totale_positivi variazione_totale_positivi nuovi_positivi ... tamponi casi_testati new_cases new_deaths new_recovered new_swabs daily_cases_perc_change daily_swab_perc_change detect_ratio data
data
2020-11-12T17:00:00 13 180.689065 33.123883 13402 1255 14657 208333 222990 4431 15258 ... 5194497 3144551.0 15258.0 301.0 10526.0 74291.0 23.45 0.45 20.54 2020-11-12T17:00:00
2020-11-13T17:00:00 13 180.689065 33.123883 13730 1279 15009 220162 235171 12181 17204 ... 5281019 3191307.0 17204.0 200.0 4823.0 86522.0 12.75 16.46 19.88 2020-11-13T17:00:00
2020-11-14T17:00:00 13 180.689065 33.123883 14071 1298 15369 227758 243127 7956 13936 ... 5343248 3227469.0 13936.0 231.0 5749.0 62229.0 -19.00 -28.08 22.39 2020-11-14T17:00:00
2020-11-15T17:00:00 13 180.689065 33.123883 14243 1336 15579 236224 251803 8676 12707 ... 5401092 3254515.0 12707.0 255.0 3776.0 57844.0 -8.82 -7.05 21.97 2020-11-15T17:00:00
2020-11-16T17:00:00 13 180.689065 33.123883 14507 1360 15867 227579 243446 -8357 8068 ... 5437825 3272566.0 8068.0 190.0 16235.0 36733.0 -36.51 -36.50 21.96 2020-11-16T17:00:00

5 rows × 25 columns

Andamento Nazionale

In [8]:
fig2 = px.bar(data_national, x='data', y='totale_casi',
             hover_data=['totale_casi'], color='totale_casi',
             height=600, color_continuous_scale='Sunsetdark')

fig2.update_layout(title_text='Total COVID19 Cases - Italy',
                  xaxis_rangeslider_visible=True)
fig2.update_yaxes(tick0=0, dtick=25000,  gridcolor='White')
fig2.show()
In [9]:
fig22 = px.bar(data_national, x='data', y='totale_positivi',
             hover_data=['totale_positivi'], color='totale_positivi',
             height=600, color_continuous_scale='Sunsetdark')

fig22.update_layout(title_text='Active COVID19 Cases - Italy',
                  xaxis_rangeslider_visible=True)
fig22.update_yaxes(tick0=0, dtick=10000,  gridcolor='White')
fig22.show()

Andamento per zone d'Italia

In [10]:
fig = go.Figure()

fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Nordovest['data'], y=cases_Nordovest['new_cases'], name="North-West",
                         line_color='red'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Nordest['data'], y=cases_Nordest['new_cases'], name="North-East",
                         line_color='green'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Centro['data'], y=cases_Centro['new_cases'], name="Center",
                         line_color='darkviolet'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Sudisole['data'], y=cases_Sudisole['new_cases'], name="South and Islands",
                         line_color='darkblue'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_cases'], name="All Italy",
                         line_color='deepskyblue'))

fig.update_layout(title_text='Daily Coronavirus new cases - All Italy and Regions',
                  xaxis_rangeslider_visible=True)


fig.show()
In [11]:
fig = go.Figure()

fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Nordovest['data'], y=cases_Nordovest['new_swabs'], name="North-West",
                         line_color='red'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Nordest['data'], y=cases_Nordest['new_swabs'], name="North-East",
                         line_color='green'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Centro['data'], y=cases_Centro['new_swabs'], name="Center",
                         line_color='darkviolet'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Sudisole['data'], y=cases_Sudisole['new_swabs'], name="South and Islands",
                         line_color='darkblue'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_swabs'], name="All Italy",
                         line_color='deepskyblue'))

fig.update_layout(title_text='Daily swabs - All Italy and Regions',
                  xaxis_rangeslider_visible=True)


fig.show()
In [12]:
fig = go.Figure()


fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_deaths'], name="Daily Deaths",
                         line_color='red'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_recovered'], name="Daily Recovered",
                         line_color='green'))



fig.update_layout(title_text='Daily Coronavirus Deaths and Recoveries - Italy',
                  xaxis_rangeslider_visible=True)

fig.update_yaxes(tick0=0, dtick=500)

fig.show()
In [13]:
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_cases'], name="Daily Cases",
                         line_color='deepskyblue'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_swabs'], name="Daily swabs",
                         line_color='purple'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_unique_tested'], name="Daily unique tested",
                         line_color='red'))
fig.update_layout(title_text='Daily Coronavirus new cases and swabs - Italy',
                  xaxis_rangeslider_visible=True)

fig.update_yaxes(tick0=0, dtick=10000)

fig.show()
In [14]:
fig3 = go.Figure()

fig3.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['detect_ratio_swabs'], name="Daily detect ratio - Italy",
                         line_color='purple'))
fig3.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['detect_ratio_cases'], name="Daily unique detect ratio - Italy",
                         line_color='red'))


fig3.update_layout(title_text="Daily Swabs detect ratio - Italy",
                  xaxis_rangeslider_visible=True)
fig3.update_yaxes(dtick=5)
In [15]:
fig4 = go.Figure()

#fig4.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['terapia_intensiva'], name="Daily total UTI - Italy",
#                         line_color='blue'))
fig4.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['totale_ospedalizzati'], name="Daily total Hospital - Italy",
                         line_color='green'))

fig4.update_layout(title_text="Daily Total Hospital - Italy",
                  xaxis_rangeslider_visible=True)
fig4.update_yaxes(dtick=2000)
In [16]:
fig5 = go.Figure()

fig5.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['terapia_intensiva'], name="Daily total UTI - Italy",
                         line_color='blue'))

fig5.update_layout(title_text="Daily Total UTI - Italy",
                  xaxis_rangeslider_visible=True)
fig5.update_yaxes(dtick=200)
In [17]:
fig6 = go.Figure()

fig6.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['daily_cases_perc_change'], name="Daily cases percentual change - Italy",
                         line_color='purple'))
fig6.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['daily_swab_perc_change'], name="Daily swab percentual change - Italy",
                         line_color='red'))


fig6.update_layout(title_text="Daily v- Italy",
                  xaxis_rangeslider_visible=True)
fig6.update_yaxes(dtick=40)
In [18]:
fig7 = go.Figure()

fig7.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_recovered'], name="Daily new recovered - Italy",
                         line_color='purple'))
fig7.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_deaths'], name="Daily new deaths - Italy",
                         line_color='red'))
fig7.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_cases'], name="Daily new cases - Italy",
                         line_color='green'))

fig7.update_layout(title_text="Daily change - Italy",
                  xaxis_rangeslider_visible=True)
fig7.update_yaxes(dtick=500)
In [19]:
fig8 = go.Figure()

fig8.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['terapia_intensiva'], name="Daily total UTI - Italy",
                         line_color='purple'))
fig8.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_deaths'], name="Daily new deaths - Italy",
                         line_color='red'))


fig8.update_layout(title_text="Daily UTI vs  Death - Italy",
                  xaxis_rangeslider_visible=True)
fig8.update_yaxes(dtick=200)

Andamento Provincia di Genova

In [20]:
data_ge=data_province[data_province['sigla_provincia']=='GE']
#print(data_ge)
fig9 = go.Figure()

fig9.add_trace(go.Scatter(mode = "lines+markers", x=data_ge['data'], y=data_ge['totale_casi'], name="Daily cases GE - Italy",
                         line_color='red'))



fig9.update_layout(title_text="Daily cases GE - Italy",
                  xaxis_rangeslider_visible=True)
fig9.update_yaxes(dtick=1000)

Andamento Regionale

In [21]:
fig10 = go.Figure()

fig10.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['totale_casi'], name="Daily cases Liguria - Italy",
                         line_color='red'))



fig10.update_layout(title_text="Daily cases Liguria - Italy",
                  xaxis_rangeslider_visible=True)
fig10.update_yaxes(dtick=1000)
In [22]:
fig11 = go.Figure()

fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['terapia_intensiva'], name="Daily UTI Liguria - Italy",
                         line_color='red'))
fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['ricoverati_con_sintomi'], name="Daily hospital Liguria - Italy",
                         line_color='purple'))
fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['new_deaths'], name="Daily new deaths Liguria - Italy",
                         line_color='green'))
fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['nuovi_positivi'], name="Daily new cases Liguria - Italy",
                         line_color='blue'))
#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['new_swabs'], name="Daily new swabs Liguria - Italy",
#line_color='black'))
fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['new_recovered'], name="Daily new recovered Liguria - Italy",
                         line_color='yellow'))

fig11.update_layout(title_text="Daily change Liguria - Italy",
                  xaxis_rangeslider_visible=True)
fig11.update_yaxes(dtick=100)
In [23]:
fig12 = go.Figure()

#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['daily_cases_perc_change'], name="Daily UTI Liguria - Italy",
#                         line_color='red'))
#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['daily_swab_perc_change'], name="Daily hospital Liguria - Italy",
#                         line_color='purple'))
fig12.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['detect_ratio'], name="Daily detect  ratio Liguria - Italy",
                         line_color='green'))
#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['nuovi_positivi'], name="Daily new cases Liguria - Italy",
#                         line_color='blue'))
#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['new_swabs'], name="Daily new swabs Liguria - Italy",
#line_color='black'))
#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['new_recovered'], name="Daily new recovered Liguria - Italy",
#                         line_color='yellow'))

fig12.update_layout(title_text="Daily detect ratio Liguria - Italy",
                  xaxis_rangeslider_visible=True)
fig12.update_yaxes(dtick=20)
In [24]:
fig13 = go.Figure()

fig13.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['daily_cases_perc_change'], name="Daily cases perc change Liguria - Italy",
                         line_color='red'))
fig13.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['daily_swab_perc_change'], name="Daily swab perc change Liguria - Italy",
                         line_color='purple'))


fig13.update_layout(title_text="Daily percentual change Liguria - Italy",
                  xaxis_rangeslider_visible=True)
fig13.update_yaxes(dtick=100)
In [25]:
#print(data_national.dtypes)
In [26]:
import pandas as pd
import numpy as np
import itertools
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import plotly.express as px
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller, acf, pacf,arma_order_select_ic
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima_model import ARIMA
import warnings
#Librerie di base
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import pyplot
from matplotlib.pyplot import figure
import plotly.tools as tls
import math
import statistics as st
import seaborn as sns 
from io import StringIO
import plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import pylab as pl
import scipy.stats as scs
from itertools import product                    # some useful functions
from tqdm import tqdm_notebook
import time
import timeit
import pytest
import os
import pyarrow

#Pacchetto Sklearn
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss
from sklearn.metrics import confusion_matrix,classification_report
from sklearn.metrics import mean_squared_error
from sklearn.utils import shuffle
from sklearn.svm import SVC

#Per Modello XGBoost
import xgboost as xgb
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt 

import category_encoders as ce
warnings.simplefilter('ignore')
In [27]:
ds=pd.DataFrame()
ds['data']=data_national['data']
ds['new']=data_national['new_cases']
ds.set_index('data')
ds = ds.dropna()
In [28]:
model = ARIMA(ds['new'], order=(1,1,4))
model_fit = model.fit(disp=0)
print(model_fit.summary())
# plot residual errors
residuals = pd.DataFrame(model_fit.resid)
residuals.plot()
pyplot.show()
residuals.plot(kind='kde')
pyplot.show()
print(residuals.describe())
forecast = model_fit.forecast(steps=10)[0]
print(forecast)
                             ARIMA Model Results                              
==============================================================================
Dep. Variable:                  D.new   No. Observations:                  265
Model:                 ARIMA(1, 1, 4)   Log Likelihood               -2269.247
Method:                       css-mle   S.D. of innovations           1256.236
Date:                Tue, 17 Nov 2020   AIC                           4552.493
Time:                        08:40:02   BIC                           4577.551
Sample:                             1   HQIC                          4562.561
                                                                              
===============================================================================
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
const         113.7399     68.177      1.668      0.095     -19.884     247.364
ar.L1.D.new     0.1101      0.095      1.165      0.244      -0.075       0.295
ma.L1.D.new    -0.1322      0.070     -1.887      0.059      -0.269       0.005
ma.L2.D.new     0.2273      0.038      5.919      0.000       0.152       0.303
ma.L3.D.new     0.3409      0.048      7.051      0.000       0.246       0.436
ma.L4.D.new    -0.6549      0.041    -15.990      0.000      -0.735      -0.575
                                    Roots                                    
=============================================================================
                  Real          Imaginary           Modulus         Frequency
-----------------------------------------------------------------------------
AR.1            9.0811           +0.0000j            9.0811            0.0000
MA.1           -1.1019           -0.0000j            1.1019           -0.5000
MA.2            0.1489           -1.0119j            1.0228           -0.2267
MA.3            0.1489           +1.0119j            1.0228            0.2267
MA.4            1.3246           -0.0000j            1.3246           -0.0000
-----------------------------------------------------------------------------
                 0
count   265.000000
mean     -1.891973
std    1259.641977
min   -6287.483354
25%    -298.677313
50%     -87.884308
75%     140.973102
max    7466.258572
[28212.58639638 26846.77497334 27077.85525045 29936.61901886
 30352.63613541 30499.66229394 30617.06764205 30731.21119869
 30844.99557261 30958.74039396]
In [ ]:
 
In [29]:
X = ds['new'].values
size = int(len(X) * 0.999)
train, test = X[0:size], X[size:len(X)]
history = [x for x in train]
predictions = list()
for t in range(len(test)):
	model = ARIMA(history, order=(1,1,4))
	model_fit = model.fit(disp=0)
	output = model_fit.forecast()
	yhat = output[0]
	predictions.append(yhat)
	obs = test[t]
	history.append(obs)
	print('predicted=%f, expected=%f' % (yhat, obs))
error = mean_squared_error(test, predictions)
print('Test MSE: %.3f' % error)
# plot
#pyplot.plot(test)
#pyplot.plot(predictions, color='red')
#pyplot.show()
predicted=31970.871280, expected=27352.000000
Test MSE: 21333971.903
In [30]:
forecast = model_fit.forecast(steps=2)[0]
print(forecast)
pyplot.plot(X)
pyplot.plot(forecast, color='red')
pyplot.show()
[31970.87128017 31826.88533112]
In [31]:
'''
import warnings
from pandas import read_csv
from pandas import datetime
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_squared_error

# evaluate an ARIMA model for a given order (p,d,q)
def evaluate_arima_model(X, arima_order):
	# prepare training dataset
	train_size = int(len(X) * 0.99)
	train, test = X[0:train_size], X[train_size:]
	history = [x for x in train]
	# make predictions
	predictions = list()
	for t in range(len(test)):
		model = ARIMA(history, order=arima_order)
		model_fit = model.fit(disp=0)
		yhat = model_fit.forecast()[0]
		predictions.append(yhat)
		history.append(test[t])
	# calculate out of sample error
	error = mean_squared_error(test, predictions)
	return error

# evaluate combinations of p, d and q values for an ARIMA model
def evaluate_models(dataset, p_values, d_values, q_values):
	#dataset = dataset.astype('float32')
	best_score, best_cfg = float("inf"), None
	for p in p_values:
		for d in d_values:
			for q in q_values:
				order = (p,d,q)
				try:
					mse = evaluate_arima_model(dataset, order)
					if mse < best_score:
						best_score, best_cfg = mse, order
					print('ARIMA%s MSE=%.3f' % (order,mse))
				except:
					continue
	print('Best ARIMA%s MSE=%.3f' % (best_cfg, best_score))

# load dataset


# evaluate parameters
p_values = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
d_values = range(0, 10)
q_values = range(0, 10)
#warnings.filterwarnings("ignore")
evaluate_models(ds['new'].values, p_values, d_values, q_values)
'''
Out[31]:
'\nimport warnings\nfrom pandas import read_csv\nfrom pandas import datetime\nfrom statsmodels.tsa.arima_model import ARIMA\nfrom sklearn.metrics import mean_squared_error\n\n# evaluate an ARIMA model for a given order (p,d,q)\ndef evaluate_arima_model(X, arima_order):\n\t# prepare training dataset\n\ttrain_size = int(len(X) * 0.99)\n\ttrain, test = X[0:train_size], X[train_size:]\n\thistory = [x for x in train]\n\t# make predictions\n\tpredictions = list()\n\tfor t in range(len(test)):\n\t\tmodel = ARIMA(history, order=arima_order)\n\t\tmodel_fit = model.fit(disp=0)\n\t\tyhat = model_fit.forecast()[0]\n\t\tpredictions.append(yhat)\n\t\thistory.append(test[t])\n\t# calculate out of sample error\n\terror = mean_squared_error(test, predictions)\n\treturn error\n\n# evaluate combinations of p, d and q values for an ARIMA model\ndef evaluate_models(dataset, p_values, d_values, q_values):\n\t#dataset = dataset.astype(\'float32\')\n\tbest_score, best_cfg = float("inf"), None\n\tfor p in p_values:\n\t\tfor d in d_values:\n\t\t\tfor q in q_values:\n\t\t\t\torder = (p,d,q)\n\t\t\t\ttry:\n\t\t\t\t\tmse = evaluate_arima_model(dataset, order)\n\t\t\t\t\tif mse < best_score:\n\t\t\t\t\t\tbest_score, best_cfg = mse, order\n\t\t\t\t\tprint(\'ARIMA%s MSE=%.3f\' % (order,mse))\n\t\t\t\texcept:\n\t\t\t\t\tcontinue\n\tprint(\'Best ARIMA%s MSE=%.3f\' % (best_cfg, best_score))\n\n# load dataset\n\n\n# evaluate parameters\np_values = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\nd_values = range(0, 10)\nq_values = range(0, 10)\n#warnings.filterwarnings("ignore")\nevaluate_models(ds[\'new\'].values, p_values, d_values, q_values)\n'
In [32]:
#Best ARIMA(1, 1, 4) MSE=154507.826
In [33]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.tsa.api import VAR
In [34]:
dat= data_national.drop(['note','stato','variazione_totale_positivi','daily_cases_perc_change','daily_swab_perc_change',
                        'daily_unique_tested_perc_change','detect_ratio_swabs','detect_ratio_cases'], axis = 1).fillna(999999999)
#dat=data_national.fillna(-9999)
dat.set_index('data')
dat = dat.drop(['data'], axis = 1)
dat.dtypes
Out[34]:
ricoverati_con_sintomi            int64
terapia_intensiva                 int64
totale_ospedalizzati              int64
isolamento_domiciliare            int64
totale_positivi                   int64
nuovi_positivi                    int64
dimessi_guariti                   int64
deceduti                          int64
casi_da_sospetto_diagnostico    float64
casi_da_screening               float64
totale_casi                       int64
tamponi                           int64
casi_testati                    float64
new_cases                       float64
new_deaths                      float64
new_recovered                   float64
new_swabs                       float64
new_unique_tested               float64
dtype: object
In [35]:
model = VAR(dat)
model_fit = model.fit()
#model_fit.summary()
In [36]:
pred = model_fit.forecast(model_fit.y, steps=5)
print(pred)
[[ 3.27827908e+04  3.56955531e+03  3.63523461e+04  6.94246700e+05
   7.30599046e+05  3.08608601e+04  4.59690974e+05  4.64570464e+04
  -1.15548855e+08 -1.15875406e+08  1.23674707e+06  1.92420425e+07
   6.32129224e+07  3.08660690e+04  7.24046383e+02  1.73269745e+04
   2.10993518e+05  7.34957470e+04]
 [ 3.30512315e+04  3.66827226e+03  3.67195038e+04  7.06561115e+05
   7.43280619e+05  3.17286551e+04  4.77985499e+05  4.71971914e+04
  -1.00393754e+08 -1.00720068e+08  1.26846269e+06  1.94553381e+07
   1.04871867e+08  3.17156196e+04  7.40144971e+02  1.82945241e+04
   2.13295565e+05  5.17375599e+07]
 [ 3.33618409e+04  3.77737934e+03  3.71392202e+04  7.18316613e+05
   7.55455833e+05  3.14689676e+04  4.96494552e+05  4.79682966e+04
  -8.68752098e+07 -8.72018947e+07  1.29991769e+06  1.96650266e+07
   1.47291646e+08  3.14549979e+04  7.71105200e+02  1.85090530e+04
   2.09688497e+05  9.32990520e+07]
 [ 3.36553440e+04  3.89352930e+03  3.75488733e+04  7.28422457e+05
   7.65971330e+05  3.05372263e+04  5.15697303e+05  4.87716280e+04
  -7.23037710e+07 -7.26304144e+07  1.33043895e+06  1.98671534e+07
   1.94248784e+08  3.05212601e+04  8.03331415e+02  1.92027515e+04
   2.02126779e+05  1.35628263e+08]
 [ 3.39085674e+04  4.01502045e+03  3.79235878e+04  7.36602628e+05
   7.74526216e+05  2.93727123e+04  5.35654791e+05  4.96150761e+04
  -6.11986591e+07 -6.15258082e+07  1.35979448e+06  2.00625623e+07
   2.47171545e+08  2.93555364e+04  8.43448093e+02  1.99574880e+04
   1.95408972e+05  1.82503402e+08]]
In [37]:
# model fitting
#model = VAR(dat)
#results = model.fit(maxlags=30, ic='aic')
#results.summary()
In [38]:
# forecasting
#lag_order = results.k_ar
#results.forecast(x_train4.values[-lag_order:], 5)
In [39]:
# plotting
plt.figure(figsize=(300, 300))
model_fit.plot_forecast(10)
Out[39]:
<Figure size 21600x21600 with 0 Axes>
In [40]:
# Evaluation
#fevd = model_fit.fevd(5)
#fevd.summary()
In [ ]: